Volume 2: The Fourier Transform.

David Morley
BYU Vol 1
11/29/18

Part 1: The Discrete Fourier Transform

In [1]:
import IPython
from matplotlib import pyplot as plt
import numpy as np
from scipy.io import wavfile
from scipy import fftpack
import time
from scipy import signal
import imageio
In [2]:
plt.rcParams["figure.dpi"] = 300             # Fix plot quality.
plt.rcParams["figure.figsize"] = (12,3)      # Change plot size / aspect (you may adjust this).
In [40]:
class SoundWave(object):
    """A class for working with digital audio signals."""

    # Problem 1.1
    def __init__(self, rate, samples):
        """Set the SoundWave class attributes.

        Parameters:
            rate (int): The sample rate of the sound.
            samples ((n,) ndarray): NumPy array of samples.
        """
        # Store each input as an attribute
        self.rate = rate
        self.samples = samples

    # Problems 1.1 and 1.7
    def plot(self,dft=False):
        """Plot the graph of the sound wave (time versus amplitude)."""
        # Save the number of samples in the set
        n = len(self.samples)        
        if dft:            
            plt.subplot(121)              # First subplot
            v = np.arange(n) * self.rate / n # Compute v in hertz
            frequencies = fftpack.fft(self.samples) # Take dft of samples
            plt.plot(v[:n//2],abs(frequencies[:n//2]*self.rate/n))
            plt.title("Frequencies vs Magnitude")
            plt.xlabel("Frequency (Hz)")  # Label axis
            plt.ylabel("Magnitude")            
            plt.xlim(left=0)
            plt.subplot(122)              # Change to 2nd subplot

        # Calculate the number of seconds in the sample
        num_seconds = n/self.rate
        y = np.linspace(0, num_seconds, n)
        plt.plot(y, self.samples, lw=.5)  # Plot samples vs time
        plt.title("Samples vs Time")
        plt.ylim(-32768, 32767)           # Use the given limits
        plt.xlabel("Time (Seconds)")      # Label axis
        plt.ylabel("Samples")  
        plt.show()                        # And show!
        
    # Problem 1.2
    def export(self, filename, force=False):
        """Generate a wav file from the sample rate and samples. 
        If the array of samples is not of type np.int16, scale it before exporting.

        Parameters:
            filename (str): The name of the wav file to export the sound to.
        """
        if type(samples) is not np.int16 or force:
            # Take only the real component
            real = np.real(self.samples)
            # Scale the samples
            scaled = np.int16((real * 32767.) / max(abs(real)))
            # Write to the file
            wavfile.write(filename, self.rate, scaled)
        else:
            wavfile.write(filename, self.rate, self.samples)
    
    # Problem 1.4
    def __add__(self, other):
        """Combine the samples from two SoundWave objects.

        Parameters:
            other (SoundWave): An object containing the samples to add
                to the samples contained in this object.
        
        Returns:
            (SoundWave): A new SoundWave instance with the combined samples.

        Raises:
            ValueError: if the two sample arrays are not the same length.
        """
        # Check that rates are matching
        if self.rate != other.rate:
            raise ValueError("A and B are not the same length")
        # Add together the samples to generate new sound
        sums = self.samples + other.samples
        # Return new sound as a SoundWave object
        return SoundWave(self.rate, sums)

    # Problem 1.4
    def __rshift__(self, other):
        """Concatentate the samples from two SoundWave objects.

        Parameters:
            other (SoundWave): An object containing the samples to concatenate
                to the samples contained in this object.

        Raises:
            ValueError: if the two sample rates are not equal.
        """
        # Check that rates are matching
        if self.rate != other.rate:
            raise ValueError("A and B are not the same length")
        # Concatenate  the samples to generate new sound
        concat = np.concatenate((self.samples, other.samples))
        # Return new sound as a SoundWave object
        return SoundWave(self.rate, concat)
    
    # Problem 2.1
    def __mul__(self, other):
        """Convolve the samples from two SoundWave objects using circular convolution.
        
        Parameters:
            other (SoundWave): An object containing the samples to convolve
                with the samples contained in this object.
        
        Returns:
            (SoundWave): A new SoundWave instance with the convolved samples.

        Raises:
            ValueError: if the two sample rates are not equal.
        """
        n = len(self.samples)
        m = len(other.samples)        
        # Check that rates are matching
        if self.rate != other.rate:
            raise ValueError("A and B are not the same length")  
        # Pad smaller array
        if n < m:
            pad = np.zeros(m-n)
            self.samples = np.concatenate([self.samples, pad])
        else:
            pad = np.zeros(n-m)
            other.samples = np.concatenate([other.samples, pad])    
        # Use 7.4 to compute convolution
        convolve = fftpack.ifft(fftpack.fft(self.samples)*fftpack.fft(other.samples)).real
        return SoundWave(self.rate, convolve)

    # Problem 2.2
    def __pow__(self, other):
        """Convolve the samples from two SoundWave objects using linear convolution.
        
        Parameters:
            other (SoundWave): An object containing the samples to convolve
                with the samples contained in this object.
        
        Returns:
            (SoundWave): A new SoundWave instance with the convolved samples.

        Raises:
            ValueError: if the two sample rates are not equal.
        """
        # Check that rates are matching
        if self.rate != other.rate:
            raise ValueError("A and B are not the same length")  
        # Store lengths of sample vectors
        n = len(self.samples)
        m = len(other.samples)
        # Set a to be the smallest value such that 2**a >= n + m - 1
        a = int(np.ceil(np.log2(n+m-1)))
        # Initialize zero vectors for padding
        selfNew = np.zeros(2**a)
        otherNew = np.zeros(2**a)
        # Use fancy indexing to 'append' zeros
        selfNew[:n] = self.samples
        otherNew[:m] = other.samples
        # Compute the convolution
        convolve = fftpack.ifft(fftpack.fft(selfNew)*fftpack.fft(otherNew))[:n+m-1].real
        return SoundWave(self.rate, convolve)        
        

    # Problem 2.4
    def clean(self, low_freq, high_freq):
        """Remove a range of frequencies from the samples using the DFT. 

        Parameters:
            low_freq (float): Lower bound of the frequency range to zero out.
            high_freq (float): Higher boound of the frequency range to zero out.
        """
        # Initialize useful values
        n = len(self.samples) 
        k_high = int(round(high_freq * n / self.rate))
        k_low = int(round(low_freq * n / self.rate))
        # Compute the dft of the samples
        samples = self.samples.copy()
        dft = fftpack.fft(self.samples)
        # Set outlying frequencies to zero
        dft[k_low:k_high] = 0
        dft[n-k_high:n-k_low] = 0
                
        return SoundWave(self.rate, fftpack.ifft(dft))    
                

Problem 1.1

  • Implement SoundWave.__init__().
  • Implement SoundWave.plot().
  • Use the scipy.io.wavefile.read() and the SoundWave class to plot tada.wav.
In [4]:
# Read in the soundfile saving the rate and samples
rate, samples = wavfile.read("tada.wav")
# Generate a Soundwave class object
tada=SoundWave(rate, samples)
# Use the plot function
tada.plot()

Problem 1.2

  • Implement SoundWave.export().
  • Use the export() method to create two new files containing the same sound as tada.wav: one without scaling, and one with scaling (use force=True).
  • Use IPython.display.Audio() to embed the original and new versions of tada.wav in the notebook.
In [5]:
# Export the unscaled file
tada.export("tada_unscaled.wav")
# Export the scaled file
tada.export("tada_scaled.wav", force=True)
IPython.display.Audio("tada.wav")
Out[5]:
In [6]:
IPython.display.Audio("tada_unscaled.wav")
Out[6]:
In [7]:
IPython.display.Audio("tada_scaled.wav")
Out[7]:

Problem 1.3

  • Implement generate_note().
  • Use generate_note() to create an A tone that lasts for two seconds. Embed it in the notebook.
In [8]:
def generate_note(frequency, duration):
    """Generate an instance of the SoundWave class corresponding to 
    the desired soundwave. Uses sample rate of 44100 Hz.
    
    Parameters:
        frequency (float): The frequency of the desired sound.
        duration (float): The length of the desired sound in seconds.
    
    Returns:
        sound (SoundWave): An instance of the SoundWave class.
    """
    r = 44100
    # Create an array to represent time
    t = np.linspace(0, duration, r * duration)
    # Create a SoundWave instance containing a tone with frequency k that lasts for s seconds.
    G = SoundWave(r, np.sin(2 * np.pi * t * frequency))
    return G
In [9]:
# Generate the A tone
A = generate_note(440, 2).samples
# Play computes product
IPython.display.Audio(rate=44100, data=A)
Out[9]:

Problem 1.4

  • Implement SoundWave.__add__().
  • Generate a three-second A minor chord (A, C, and E) and embed it in the notebook.
  • Implement SoundWave.__rshift__().
  • Generate the arpeggio A$\,\rightarrow\,$C$\,\rightarrow\,$E, where each tone lasts one second, and embed it in the notebook.
In [10]:
# Generate notes for chord
A = generate_note(440, 3)
C = generate_note(523.25, 3)
E = generate_note(659.25, 3)
# Add notes to make chord
Aminor = A + E + C
# Play computed product
IPython.display.Audio(rate=44100, data=Aminor.samples)
Out[10]:
In [11]:
# Generate notes for arpeggio
A = generate_note(440, 1)
C = generate_note(523.25, 1)
E = generate_note(659.25, 1)
# Right shift notes to make arpeggio
arpeggio = A >> C >> E
# Play computed product
IPython.display.Audio(rate=44100, data=arpeggio.samples)
Out[11]:

Problem 1.5

  • Implement simple_dft() with the formula for $c_k$ given below.
  • Use np.allclose() to check that simple_dft() and scipy.fftpack.fft() give the same result (after scaling).
$$ c_k = \frac{1}{n}\sum_{\ell=0}^{n-1} f_\ell e^{-2 \pi i k \ell\, /\, n} $$
In [12]:
def simple_dft(samples):
    """Compute the DFT of an array of samples.

    Parameters:
        samples ((n,) ndarray): an array of samples.
    
    Returns:
        ((n,) ndarray): The DFT of the given array.
    """
    # Initialize size of input vector
    n = len(samples)
    # Create a vector of values of k
    k = np.arange(n).reshape(n,1)
    # Use formula and aray broadcasting to create W
    W = np.exp(-2*np.pi*1j/n * k @ k.T)
    # Return dft
    return np.array(W @ samples / n)
In [13]:
# Choose size of test vector
n = 4
# Create test vector
test_array = np.random.random(n)
# Compute DFTs
myC = 4 * simple_dft(test_array)
scipyC = fftpack.fft(test_array)
# Compare sizes
np.allclose(myC, scipyC)
Out[13]:
True

Problem 1.6

  • Implement simple_fft().
  • Generate an array of $8192$ random samples and take its DFT using simple_dft(), simple_fft(), and scipy.fftpack.fft(). Print the runtimes of each computation.
  • Use np.allclose() to check that simple_fft() and scipy.fftpack.fft() give the same result (after scaling).
In [14]:
def simple_fft(samples, threshold=1):
    """Compute the DFT using the FFT algorithm.
    
    Parameters:
        samples ((n,) ndarray): an array of samples.
        threshold (int): when a subarray of samples has fewer
            elements than this integer, use simple_dft() to
            compute the DFT of that subarray.
    
    Returns:
        ((n,) ndarray): The DFT of the given array.
    """
    # Initialize length of sample vector
    n = len(samples)
    # If it's less than the threshold, use the simple dft
    if n <= threshold:
        return simple_dft(samples)
    else:
        # Break up sample vector by evens and odds and run function recursively
        f_even = simple_fft(samples[::2])
        f_odd = simple_fft(samples[1::2])
        # Create w vector
        w = np.exp((-2j * np.pi/n) * np.arange(n))
        # Break up output by first and second half
        first_sum = f_even + w[:n//2] * f_odd
        second_sum = f_even + w[n//2:] * f_odd
        # Return the scaled full fft
        return .5 * np.concatenate([first_sum, second_sum])
In [15]:
# Choose the number of samples to use
n = 8192
# Create the test array
test_array = np.random.random(n)
# Run the simple dft
start = time.time()
dft = n * simple_dft(test_array)
dft_time = time.time() - start
# Run the simple fft
start = time.time()
fft = n * simple_fft(test_array)
fft_time = time.time() - start
# Run the scipy implementation
start = time.time()
scipy = fftpack.fft(test_array)
scipy_time = time.time() - start
print(f"simple_dft: {dft_time} seconds\nsimple_fft: {fft_time} seconds\nscipy.fftpack.fft: {scipy_time} seconds")
np.allclose(fft, scipy)
simple_dft: 14.811764240264893 seconds
simple_fft: 0.4446523189544678 seconds
scipy.fftpack.fft: 0.0020613670349121094 seconds
Out[15]:
True

Problem 1.7

  • Modify SoundWave.plot() so that it accepts a boolean. When the boolean is True, take the DFT of the stored samples and plot (in a new subplot) the frequencies present on the $x$-axis and the magnituds of those frequences on the $y$-axis. Only the display the first half of the plot, and adjust the $x$-axis so that it correctly shows the frequencies in Hertz.
  • Display the plot of the DFT of an A tone.
  • Display the plot of the DFT of an A minor chord.
In [16]:
A.plot(True)
In [17]:
Aminor.plot(True)

Problem 1.8

Use the DFT to determine the individual notes that are present in mystery_chord.wav.

In [18]:
# Read in the soundfile saving the rate and samples
rate, samples = wavfile.read("mystery_chord.wav")
# Save the size of the sample
n = len(samples)
# Calculate the absolute value of dft keeping only first half
dft = abs(fftpack.fft(samples))[:n//2]
# Sort dft descending order and keep indices
indices = np.argsort(dft)[::-1]
k = indices[:4]
# Use (6.4) to calculate frequencies
frequencies = (k * rate / n)
frequencies
Out[18]:
array([440.  , 784.  , 523.25, 587.5 ])

The notes are...

A G C D

Part 2: Convolution and Filtering.

Problem 2.1

  • Implement SoundWave.__mul__() for circular convolution.
  • Generate 2 seconds of white noise at the same sample rate as tada.wav.
  • Compute the circular convolution of tada.wav and the white noise. Embed the result in the notebook.
  • Append the circular convolution to itself and embed the result in the notebook.
In [19]:
rate = tada.rate        # Create 2 seconds of white noise at a given rate.
white_noise = np.random.randint(-32767, 32767, rate*2, dtype=np.int16)
white = SoundWave(rate, white_noise)
# Convolve white noise and tada sound
white_convolve = white * tada
IPython.display.Audio(rate=rate, data=white_convolve.samples)
Out[19]:
In [20]:
# Append circular convolution to itself
full_track = white_convolve >> white_convolve
IPython.display.Audio(rate=rate, data=full_track.samples)
Out[20]:

Problem 2.2

  • Implement SoundWave.__pow__() for linear convolution.
  • Time the linear convolution of CGC.wav and GCG.wav using SoundWave.__pow__() and scipy.signal.fftconvolve().
  • Embed the two original sounds and their convolutions in the notebook. Check that the convolutions with SoundWave.__pow__() and scipy.signal.fftconvolve() sound the same.
In [21]:
# Read in the soundfile and generate a Soundwave class object
CGC=SoundWave(wavfile.read("CGC.wav")[0], wavfile.read("CGC.wav")[1])
GCG=SoundWave(wavfile.read("GCG.wav")[0], wavfile.read("GCG.wav")[1])
# Compute and time the linear convolution of CGC and GCG
%time cgc_convo =CGC ** GCG 
Wall time: 176 ms
In [22]:
# Compute and time the linear convolution of CGC and GCG
%time gcg_convo = signal.fftconvolve(CGC.samples, GCG.samples)
Wall time: 67.9 ms
In [23]:
# Embed CGC sound
IPython.display.Audio(rate=rate, data=CGC.samples)
Out[23]:
In [24]:
# Embed GCG sound
IPython.display.Audio(rate=rate, data=GCG.samples)
Out[24]:
In [25]:
# Embed convolution of CGC and GCG
IPython.display.Audio(rate=CGC.rate, data=cgc_convo.samples)
Out[25]:

Problem 2.3

Use SoundWave.__pow__() or scipy.signal.fftconvolve() to compute the linear convolution of chopin.wav and balloon.wav. Embed the two original sounds and their convolution in the notebook.

In [26]:
# Embed chopin sound
chopin_rate, chopin_samples = wavfile.read("chopin.wav")
IPython.display.Audio("chopin.wav")
Out[26]:
In [27]:
# Embed Balloon sound
balloon_rate, balloon_samples = wavfile.read("balloon.wav")
IPython.display.Audio("balloon.wav")
Out[27]:
In [28]:
# Compute and embed convolution of balloon and chopin
convo = signal.fftconvolve(chopin_samples, balloon_samples)
IPython.display.Audio(rate=chopin_rate, data=convo)
Out[28]:

Problem 2.4

  • Implement SoundWave.clean().
  • Clean noisy1.wav by filtering out frequencies from $1250$-$2600$ Hz. Embed the original and the cleaned versions in the notebook.
  • Clean noisy2.wav. Embed the original and the cleaned versions in the notebook.
In [43]:
# Embed first soundfile
noisy1 = SoundWave(wavfile.read("noisy1.wav")[0], wavfile.read("noisy1.wav")[1])
IPython.display.Audio("noisy1.wav")
Out[43]:
In [45]:
# Clean first sound file
clean1 = noisy1.clean(1250, 2600)
IPython.display.Audio(rate=noisy1.rate, data=clean1.samples)
c:\users\dcm96\anaconda3\lib\site-packages\IPython\lib\display.py:124: ComplexWarning: Casting complex values to real discards the imaginary part
  data = np.array(data, dtype=float)
Out[45]:
In [31]:
# Embed second sound file
noisy2 = SoundWave(wavfile.read("noisy2.wav")[0], wavfile.read("noisy2.wav")[1])
IPython.display.Audio("noisy2.wav")
Out[31]:
In [32]:
noisy2.plot(True)
In [46]:
# Clean second soundfile
clean2 = noisy2.clean(1300, 4300)
IPython.display.Audio(rate=noisy1.rate, data=clean2.samples)
c:\users\dcm96\anaconda3\lib\site-packages\IPython\lib\display.py:124: ComplexWarning: Casting complex values to real discards the imaginary part
  data = np.array(data, dtype=float)
Out[46]:

Problem 2.5

  • Clean vuvuzela.wav by filtering bad frequencies out of the left and right channels individually.
  • Recombine the left and right channels and embed the result in the notebook.
In [41]:
# Read in sound files separately to get both channels
vuvuzelaA = SoundWave(wavfile.read("vuvuzela.wav")[0], wavfile.read("vuvuzela.wav")[1][:,0])
vuvuzelaB = SoundWave(wavfile.read("vuvuzela.wav")[0], wavfile.read("vuvuzela.wav")[1][:,1])
# Embed original sound
IPython.display.Audio(rate=vuvuzelaA.rate, data=np.array([vuvuzelaA.samples,vuvuzelaB.samples]))
Out[41]:
In [35]:
vuvuzelaA.plot(True)
In [36]:
vuvuzelaB.plot(True)
In [42]:
# Clean frequencies
cleanA = vuvuzelaA.clean(200,500)
cleanB = vuvuzelaB.clean(200,500)
# Recombine channels
cleanStereo = np.vstack((cleanA.samples, cleanB.samples))
IPython.display.Audio(rate=cleanA.rate, data=cleanStereo)
c:\users\dcm96\anaconda3\lib\site-packages\IPython\lib\display.py:124: ComplexWarning: Casting complex values to real discards the imaginary part
  data = np.array(data, dtype=float)
Out[42]:

Problem 2.6

  • Clean up license_plate.png so that the year printed on the sticker in the bottom right corner of the plate is legible.
  • Display the original and cleaned images.
In [38]:
# Read in license plate image
im = imageio.imread("license_plate.png")
# Take FFT
im_dft = fftpack.fft2(im)
# Plot FFT vs Original
plt.subplot(121)
plt.imshow(np.log(np.abs(im_dft)), cmap="gray")
plt.subplot(122)
plt.imshow(im, cmap="gray")
plt.show()
c:\users\dcm96\anaconda3\lib\site-packages\scipy\fftpack\basic.py:160: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  z[index] = x
In [39]:
# Create a copy of the FFT
adj_dft = im_dft.copy()
# Fill in the spikes with the average values
mean_dft = np.mean(adj_dft[20:40,90:110])
adj_dft[31:39,98:105] = mean_dft
mean_dft = np.mean(adj_dft[170:190,320:340])
adj_dft[178:185,325:338] = mean_dft
# Show the cleaner image
plt.imshow(fftpack.ifft2(adj_dft).real, cmap="gray")
c:\users\dcm96\anaconda3\lib\site-packages\scipy\fftpack\basic.py:160: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  z[index] = x
Out[39]:
<matplotlib.image.AxesImage at 0x1748d824828>

The year on the sticker is...

13

In [ ]: